# Import required R libraries
library(fpp3)
Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
global_economy %>%
autoplot(GDP/Population, show.legend=FALSE) +
labs(title= "GDP per capita", y = "$US")
global_economy %>%
filter(Year == "2017") %>%
mutate(GdpPerPop = GDP/Population) %>%
arrange(desc(GdpPerPop))
## # A tsibble: 262 x 10 [1Y]
## # Key: Country [262]
## Country Code Year GDP Growth CPI Imports Exports Population GdpPerPop
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Luxemb… LUX 2017 6.24e10 2.30 111. 194. 230. 599449 104103.
## 2 Macao … MAC 2017 5.04e10 9.10 136. 32.0 79.4 622567 80893.
## 3 Switze… CHE 2017 6.79e11 1.09 98.3 53.9 65.0 8466017 80190.
## 4 Norway NOR 2017 3.99e11 1.92 115. 33.1 35.5 5282223 75505.
## 5 Iceland ISL 2017 2.39e10 3.64 122. 42.8 47.0 341284 70057.
## 6 Ireland IRL 2017 3.34e11 7.80 105. 87.9 120. 4813608 69331.
## 7 Qatar QAT 2017 1.67e11 1.58 116. 37.3 51.0 2639211 63249.
## 8 United… USA 2017 1.94e13 2.27 112. NA NA 325719178 59532.
## 9 North … NAC 2017 2.10e13 2.35 NA NA NA 362492702 58070.
## 10 Singap… SGP 2017 3.24e11 3.62 113. 149. 173. 5612253 57714.
## # … with 252 more rows
For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
United States GDP from global_economy
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP/Population) +
labs(title= "GDP per capita", y = "$US")
Population adjustment
Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers" &
State == "Victoria") %>%
mutate(DailyAvgByMonth = Count / days_in_month(Month)) %>%
autoplot(DailyAvgByMonth) +
labs(title= "Slaughter of Victorian Bulls, bullocks and steers", y = "Daily Average by Month")
# Help from: https://stackoverflow.com/questions/30037722/daily-average-to-monthly-total-in-r
Apply calendar adjustment with days_in_month from lubridate library.
Victorian Electricity Demand from vic_elec.
victs <- vic_elec %>%
index_by(Date) %>%
ungroup() %>%
select(-c(Time)) %>%
as_tsibble(index=Date, key=Demand)
head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
head(victs)
## # A tsibble: 6 x 5 [1D]
## # Key: Demand [6]
## Demand Temperature Date Holiday Time
## <dbl> <dbl> <date> <lgl> <dttm>
## 1 2858. 13.8 2014-03-16 FALSE 2014-03-16 04:30:00
## 2 2870. 13.8 2014-03-16 FALSE 2014-03-16 05:00:00
## 3 2871. 13.8 2014-03-16 FALSE 2014-03-16 04:00:00
## 4 2877. 14.9 2012-12-25 TRUE 2012-12-25 05:30:00
## 5 2903. 14.8 2012-12-25 TRUE 2012-12-25 05:00:00
## 6 2905. 13.1 2013-12-25 TRUE 2013-12-25 05:30:00
# as_tsibble(index = Date, key = Temperature) %>%
# autoplot(DailyTotal) +
# labs(title= "Electricity Demand", y = "Daily Total (in MW)")
group_by(Date) %>% summarise(DailyTotal = sum(Demand)) %>% mutate(Quarter = yearquarter(Quarter)) %>% as_tsibble(index = Date, key = DailyTotal) Calendar adjustment, to track by day instead of per 30-minute intervals
Gas production from aus_production.
# From section 3.1
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
#aus_production %>%
# autoplot(Gas) +
# labs(title= "Gas Production", y = "Petajoule")
Box-Cox (mathematical transformation)
Why is a Box-Cox transformation unhelpful for the canadian_gas data?
canadian_gas %>%
autoplot(Volume) +
labs(title= "Monthly Canadian Gas Production", y = "Billions of cubic meters")
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
The Box-Cox transformations is unhelpful for
canadian_gas because the seasonal variation is already about the same across the whole series. As seen above in the initial plot without transformation and the second plot with a Box-Cox transformation, the transformation doesn’t necessarily tease out the season variation. If anything, the transformation diminishes the impact of the large seasonal swings between 1978 through 1990.
What Box-Cox transformation would you select for your retail data (from Exercise 8 in Section 2.10)?
set.seed(8675309)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover) +
labs(title = "Turnover in Queensland Takeaway food services",
subtitle = "Series ID: A3349767W",
y = "Turnover")
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
autoplot(box_cox(Turnover, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed food services turnover with $\\lambda$ = ",
round(lambda,2))))
For the following series, find an appropriate Box-Cox transformation in order to stabilize the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.
# Tobacco
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed tobacco production with $\\lambda$ = ",
round(lambda,2))))
# Economy class passengers between Melbourne and Sydney
lambda <- ansett %>%
filter(Airports == 'MEL-SYD' &
Class == 'Economy') %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
ansett %>%
filter(Airports == 'MEL-SYD' &
Class == 'Economy') %>%
autoplot(box_cox(Passengers, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed economy passengers between Mel and Syd with $\\lambda$ = ",
round(lambda,2))))
# Pedestrian counts at Southern Cross Station
lambda <- pedestrian %>%
filter(Sensor == 'Southern Cross Station') %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
pedestrian %>%
filter(Sensor == 'Southern Cross Station') %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed pedestrian count at Southern Cross Station with $\\lambda$ = ",
round(lambda,2))))
Consider the last five years of the Gas data from aus_production.
gas <- tail(aus_production, 5*4) %>% select(Gas)
head(gas)
## # A tsibble: 6 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?
gas %>%
autoplot(Gas)
Trend-cycle shows an increase over the past five years. And the seasonal variance shows lows in Q1 and highs in Q3.
Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.
# From section 3.4
dc <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components()
dc %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")
Do the results support the graphical interpretation from part A?
Yes, the trend line shows an increase from left to right with a plateau in the middle. The seasonal indices shows an almost perfect seasonal variance over the five-year window.
Compute and plot the seasonally adjusted data.
dc %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "Petajoules",
title = "Gas production in Australia") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
# Outlier in beginning
gas_OutFront <- gas
gas_OutFront$Gas[1] <- gas_OutFront$Gas[1] + 300
of_dc <- gas_OutFront %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components()
of_dc %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")
of_dc %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "Petajoules",
title = "Gas production in Australia") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
# Outlier in middle
gas_OutMid <- gas
gas_OutMid$Gas[11] <- gas_OutMid$Gas[11] + 300
om_dc <- gas_OutMid %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components()
om_dc %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")
om_dc %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "Petajoules",
title = "Gas production in Australia") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
# Outlier in back
gas_OutBack <- gas
gas_OutBack$Gas[20] <- gas_OutBack$Gas[20] + 300
ob_dc <- gas_OutBack %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components()
ob_dc %>%
autoplot() +
labs(title = "Classical multiplicative decomposition of gas production in Australia in petajoules")
ob_dc %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "Petajoules",
title = "Gas production in Australia") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
Does it make any difference if the outlier is near the end rather than in the middle of the time series?
Recall your retail time series data (from Exercise 8 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
library(seasonal)
set.seed(8675309)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of Turnover in Queensland Takeaway food services using X-11.")
Yes, the seasonal variance flips over time, and there are a few outliers as identified from the “irregular” chart.
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
Is the recession of 1991/1992 visible in the estimated components?